{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 02 Analyzing a cumulative relative frequency graph (optional)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%%html\n", "" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from pandas import Series, DataFrame\n", "import matplotlib.pyplot as plt\n", "from scipy import stats" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[khanacademy](https://www.khanacademy.org/math/ap-statistics/density-curves-normal-distribution-ap/percentiles-cumulative-relative-frequency/v/analyzing-a-cumulative-relative-frequency-graph)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[datasciencemadesimple](http://www.datasciencemadesimple.com/percentile-rank-column-pandas-python-2/)\n", "[pandas.DataFrame.rank](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rank.html)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "data = {\n", " 'Name':['George','Andrea','micheal','maggie','Ravi','Xien','Jalpa'],\n", " 'Mathematics_score':[62,47,55,74,32,77,86]\n", "}" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "df = DataFrame(data)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "df['Rank'] = df['Mathematics_score'].rank()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "df['Percentile_rank'] = df['Mathematics_score'].rank(pct=True)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameMathematics_scoreRankPercentile_rank
0George624.00.571429
1Andrea472.00.285714
2micheal553.00.428571
3maggie745.00.714286
4Ravi321.00.142857
5Xien776.00.857143
6Jalpa867.01.000000
\n", "
" ], "text/plain": [ " Name Mathematics_score Rank Percentile_rank\n", "0 George 62 4.0 0.571429\n", "1 Andrea 47 2.0 0.285714\n", "2 micheal 55 3.0 0.428571\n", "3 maggie 74 5.0 0.714286\n", "4 Ravi 32 1.0 0.142857\n", "5 Xien 77 6.0 0.857143\n", "6 Jalpa 86 7.0 1.000000" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "df = df.sort_values('Mathematics_score')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(df['Mathematics_score'], df['Percentile_rank'], '-o')\n", "plt.xlabel('Mathematics Scores')\n", "plt.ylabel('Percentile Rank')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "# understand Empirical_CDF and shit" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Refs:\n", "[1](https://github.com/neopanther/statistics/blob/master/4%20Modeling%20data%20distributions_khan/1%20Calculating%20percentile.ipynb)\n", "[2](https://github.com/neopanther/statistics/blob/master/4%20Modeling%20data%20distributions_khan/2%20Analyzing%20a%20cumulative%20relative%20frequency%20graph.ipynb)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 4 }